001 /* 002 * NeoBio.java 003 * 004 * Copyright 2003 Sergio Anibal de Carvalho Junior 005 * 006 * This file is part of NeoBio. 007 * 008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of 009 * the GNU General Public License as published by the Free Software Foundation; either 010 * version 2 of the License, or (at your option) any later version. 011 * 012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 014 * PURPOSE. See the GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License along with NeoBio; 017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 018 * Boston, MA 02111-1307, USA. 019 * 020 * Proper attribution of the author as the source of the software would be appreciated. 021 * 022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net 023 * Department of Computer Science http://www.dcs.kcl.ac.uk 024 * King's College London, UK http://www.kcl.ac.uk 025 * 026 * Please visit http://neobio.sourceforge.net 027 * 028 * This project was supervised by Professor Maxime Crochemore. 029 * 030 */ 031 032 package neobio.textui; 033 034 import neobio.alignment.*; 035 import java.io.FileReader; 036 import java.io.IOException; 037 038 /** 039 * This class is a simple command line based utility for computing pairwise sequence 040 * alignments using one of the the algorithms provided in the {@link neobio.alignment} 041 * package. 042 * 043 * <P>The main method takes the follwing parameters from the command line: 044 * 045 * <CODE><BLOCKQUOTE> 046 * NeoBio <alg> <S1> <S2> [M <matrix> | S <match> 047 * <mismatch> <gap>] 048 * </BLOCKQUOTE></CODE> 049 * 050 * <UL> 051 * <LI><B><CODE><alg></CODE></B> is either <B><CODE>NW</CODE></B> for {@linkplain 052 * neobio.alignment.NeedlemanWunsch Needleman & Wunsch} (global alignment), 053 * <B><CODE>SW</CODE></B> for {@linkplain neobio.alignment.SmithWaterman Smith & Waterman} 054 * (local alignment), <B><CODE>CLZG</CODE></B> for {@linkplain 055 * neobio.alignment.CrochemoreLandauZivUkelsonGlobalAlignment Crochemore, Landau & 056 * Ziv-Ukelson global alignment} or <B><CODE>CLZL</CODE></B> for {@linkplain 057 * neobio.alignment.CrochemoreLandauZivUkelsonLocalAlignment Crochemore, Landau & 058 * Ziv-Ukelson local alignment}; 059 * 060 * <LI><B><CODE><S1></CODE></B> is the first sequence file; 061 * 062 * <LI><B><CODE><S2></CODE></B> is the second sequence file; 063 * 064 * <LI><B><CODE>M <matrix></CODE></B> is for using a scoring matrix file; 065 * 066 * <LI><B><CODE>S <match> <mismatch> <gap></CODE></B> is for using a 067 * simple scoring scheme, where <B><CODE><match></CODE></B> is the match reward 068 * value, <B><CODE><mismatch></CODE></B> is the mismatch penalty value and 069 * <B><CODE><gap></CODE></B> is the cost of a gap (linear gap cost function). 070 * </UL> 071 * 072 * @author Sergio A. de Carvalho Jr. 073 */ 074 public class NeoBio 075 { 076 /** 077 * The main method takes parameters from the command line to compute a pairwise 078 * sequence alignment. See the class description for details. 079 * 080 * @param args command line arguments 081 */ 082 public static void main (String args[]) 083 { 084 PairwiseAlignmentAlgorithm algorithm; 085 FileReader seq1, seq2; 086 ScoringScheme scoring; 087 PairwiseAlignment alignment; 088 String algo, file1, file2, scoring_type; 089 long start, elapsed; 090 int match, mismatch, gap; 091 092 try 093 { 094 // create an instance of the 095 // requested algorithm 096 algo = args[0]; 097 098 if (algo.equalsIgnoreCase("nw")) 099 algorithm = new NeedlemanWunsch(); 100 else if (algo.equalsIgnoreCase("sw")) 101 algorithm = new SmithWaterman(); 102 else if (algo.equalsIgnoreCase("clzg")) 103 algorithm = new CrochemoreLandauZivUkelsonGlobalAlignment(); 104 else if (algo.equalsIgnoreCase("clzl")) 105 algorithm = new CrochemoreLandauZivUkelsonLocalAlignment(); 106 else 107 { 108 usage(); 109 System.exit(1); 110 return; 111 } 112 113 // sequences file names 114 file1 = args[1]; 115 file2 = args[2]; 116 } 117 catch (ArrayIndexOutOfBoundsException e) 118 { 119 usage(); 120 System.exit(1); 121 return; 122 } 123 124 try 125 { 126 // scoring scheme type 127 scoring_type = args[3]; 128 129 try 130 { 131 if (scoring_type.equalsIgnoreCase("M")) 132 { 133 // use scoring matrix 134 scoring = new ScoringMatrix (new FileReader(args[4])); 135 } 136 else if (scoring_type.equalsIgnoreCase("S")) 137 { 138 // use basic scoring scheme 139 match = Integer.parseInt(args[4]); 140 mismatch = Integer.parseInt(args[5]); 141 gap = Integer.parseInt(args[6]); 142 143 scoring = new BasicScoringScheme (match, mismatch, gap); 144 } 145 else 146 { 147 usage(); 148 System.exit(1); 149 return; 150 } 151 } 152 catch (NumberFormatException e) 153 { 154 usage(); 155 System.exit(1); 156 return; 157 } 158 catch (ArrayIndexOutOfBoundsException e) 159 { 160 usage(); 161 System.exit(1); 162 return; 163 } 164 catch (InvalidScoringMatrixException e) 165 { 166 System.err.println(e.getMessage()); 167 System.exit(2); 168 return; 169 } 170 catch (IOException e) 171 { 172 System.err.println(e.getMessage()); 173 System.exit(2); 174 return; 175 } 176 } 177 catch (ArrayIndexOutOfBoundsException e) 178 { 179 // not specified: use default scoring scheme 180 scoring = new BasicScoringScheme (1, -1, -1); 181 } 182 183 // set scoring scheme 184 algorithm.setScoringScheme(scoring); 185 186 try 187 { 188 // load sequences 189 System.err.println("\nLoading sequences..."); 190 191 seq1 = new FileReader(file1); 192 seq2 = new FileReader(file2); 193 194 start = System.currentTimeMillis(); 195 algorithm.loadSequences(seq1, seq2); 196 elapsed = System.currentTimeMillis() - start; 197 198 // close files 199 seq1.close(); 200 seq2.close(); 201 202 System.err.println("[ Elapsed time: " + elapsed + " milliseconds ]\n"); 203 204 /* 205 // compute score only 206 System.err.println("\nComputing score..."); 207 208 start = System.currentTimeMillis(); 209 score = algorithm.getScore(); 210 elapsed = System.currentTimeMillis() - start; 211 212 System.out.println("Score: " + score); 213 System.err.println("[ Elapsed time: " + elapsed + " milliseconds ]"); 214 //*/ 215 216 // compute alignment 217 System.err.println("Computing alignment..."); 218 219 start = System.currentTimeMillis(); 220 alignment = algorithm.getPairwiseAlignment(); 221 elapsed = System.currentTimeMillis() - start; 222 223 System.err.println("[ Elapsed time: " + elapsed + " milliseconds ]\n"); 224 225 System.out.println("Alignment:\n" + alignment); 226 } 227 catch (InvalidSequenceException e) 228 { 229 System.err.println("Invalid sequence file."); 230 System.exit(2); 231 return; 232 } 233 catch (IncompatibleScoringSchemeException e) 234 { 235 System.err.println("Incompatible scoring scheme."); 236 System.exit(2); 237 return; 238 } 239 catch (IOException e) 240 { 241 System.err.println(e.getMessage()); 242 System.exit(2); 243 return; 244 } 245 246 // print scoring scheme 247 //System.out.println(scoring); 248 249 System.exit(0); 250 } 251 252 /** 253 * Prints command line usage. 254 */ 255 public static void usage () 256 { 257 System.err.println( 258 "\nUsage: NeoBio <alg> <S1> <S2> [M <matrix> | S <match> <mismatch> <gap>]\n\n" + 259 "where:\n\n" + 260 " <alg> = NW for Needleman & Wunsch (global alignment)\n" + 261 " or SW for Smith & Waterman (local alignment)\n" + 262 " or CLZG for Crochemore, Landau & Ziv-Ukelson global alignment\n" + 263 " or CLZL for Crochemore, Landau & Ziv-Ukelson local alignment\n\n" + 264 " <S1> = first sequence file\n\n" + 265 " <S2> = second sequence file\n\n" + 266 " M <matrix> for using a scoring matrix file\n\n" + 267 "or\n\n" + 268 " S <match> <mismatch> <gap> for using a simple scoring scheme\n" + 269 " where <match> = match reward value\n" + 270 " <mismatch> = mismatch penalty value\n" + 271 " <gap> = cost of a gap (linear gap cost function)" 272 ); 273 } 274 }